Pros and Cons Analysis 23-03-25

Author

Celestin Okoroji

Preamble

This document was created to address our recent email chain. This document has not been internally Q&A and should be considered draft.

The Variables of Interest

First let us construct a subset of the main data set only containing ‘the questions where we ask people if they think they see better / worse / no impact on various T&Cs as a result of being outsourced’ and some demographics (we assume here that the questions being referred to are the ‘pros and cons’ vars because these have matching response options as described.

There are 14 of these variables to which we add a few demographic variables for use later on (Sex, Age, Ethnicity_Collapsed, Region, Education_Band, Annual_Income, Income_Group, outsourcing group)

Code for Nerds
library(tidyverse)

# Read data
data <- read_csv("data/2025-01-21 - clean_data_jrf_experiential.csv")

# reduced dataset
data_processed <- data %>%
  mutate(across(where(is.character), as.factor)) %>%
  select(
    starts_with("pro"),
    Sex, Age, Ethnicity_Collapsed, Region, Education_Band,
    Annual_Income, Income_Group, OutsourcedNonOL, BORNUK
  )

Next, since the data we are interested in is categorical and has already been cross tabulated elsewhere, we will transform the data into a numerical form so we can do some other interesting analysis.

Code for Nerds
# Function to transform categorical responses to numeric values
recode_pros_cons <- function(x) {
  case_when(
    grepl("more|better|easier", tolower(x)) ~ 1,
    grepl("less|harder|worse", tolower(x)) ~ -1,
    grepl("neither|no impact", tolower(x)) ~ 0,
    grepl("don't know", tolower(x)) ~ NA_real_,
    # Default case
    TRUE ~ NA_real_
  )
}

# Process the data
data_processed <- data %>%
  mutate(across(starts_with("Pros_And_Cons"), 
                ~ recode_pros_cons(as.character(.)),
                .names = "{.col}_numeric")) %>%
  # Select both original and new numeric columns
  select(
    # Original variables
    starts_with("pro"),
    # Recoded numeric variables
    contains("_numeric"),
    Sex, Age, Ethnicity, Ethnicity_Collapsed, Region, Education_Band,
    Annual_Income, Income_Group, OutsourcedNonOL, BORNUK
  )

Next lets create a basic visualisation of the data. As you will notice most responses seem to be positive or neutral. Its relevant to note that some of the variable have 0 negative responses (over 1800+ outsourced respondents). This seems to indicate that by and large outsourced workers consider that being outsourced has a limited negative impact on the areas covered by the variables. I’ve added correlations here for completeness though there a no really strong correlations, and only a few moderate ones.

Code for Nerds
# Load visualization packages
library(ggplot2)
library(tidyr)
library(forcats)

# Create stacked bar chart for all Pros_And_Cons variables
pros_cons_plot <- data_processed %>%
  select(contains("_numeric")) %>%
  # Convert to long format for plotting
  pivot_longer(
    cols = everything(),
    names_to = "variable",
    values_to = "response"
  ) %>%
  # Clean variable names for display
  mutate(
    variable = gsub("Pros_And_Cons_|_numeric", "", variable),
    variable = gsub("_", " ", variable),
    response_label = case_when(
      response == 1 ~ "Positive",
      response == 0 ~ "Neutral",
      response == -1 ~ "Negative",
      is.na(response) ~ "Don't know"
    )
  ) %>%
  # Count responses for each variable and response type
  count(variable, response, response_label) %>%
  group_by(variable) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  mutate(response_label = factor(response_label, 
                               levels = c("Negative", "Neutral", "Positive", "Don't know")))

# Plot stacked bar chart
ggplot(pros_cons_plot, aes(x = variable, y = percentage, fill = response_label)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_manual(values = c("Negative" = "#E74C3C", 
                              "Neutral" = "#F1C40F", 
                              "Positive" = "#2ECC71", 
                              "Don't know" = "#BDC3C7")) +
  coord_flip() +
  labs(
    title = "Pros and Cons: Response Distribution",
    x = NULL,
    y = "Percentage (%)",
    fill = "Response"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

Code for Nerds
# Create a diverging bar chart to better visualize positive/negative distribution
diverging_plot <- pros_cons_plot %>%
  # Filter out Don't know responses
  filter(!is.na(response)) %>%
  # Create values for diverging bars
  mutate(
    plot_value = ifelse(response == 0, 0,
                       ifelse(response == 1, percentage, -percentage))
  )

# Plot diverging bar chart
ggplot(diverging_plot, aes(x = variable, y = plot_value, fill = response_label)) +
  geom_bar(stat = "identity") +
  scale_fill_manual(values = c("Negative" = "#E74C3C", 
                              "Neutral" = "#F1C40F", 
                              "Positive" = "#2ECC71")) +
  coord_flip() +
  labs(
    title = "Pros and Cons: Positive vs Negative Responses",
    subtitle = "Negative values on left, positive values on right",
    x = NULL,
    y = "Percentage (%)",
    fill = "Response"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom") +
  geom_hline(yintercept = 0, color = "black", linewidth = 0.5)

Code for Nerds
# Create a correlation heatmap to see relationships between variables
correlation_plot <- data_processed %>%
  select(contains("_numeric")) %>%
  cor(use = "pairwise.complete.obs") %>%
  as.data.frame() %>%
  rownames_to_column("variable1") %>%
  pivot_longer(-variable1, names_to = "variable2", values_to = "correlation") %>%
  mutate(
    variable1 = gsub("Pros_And_Cons_|_numeric", "", variable1),
    variable2 = gsub("Pros_And_Cons_|_numeric", "", variable2)
  )

# Plot correlation heatmap
ggplot(correlation_plot, aes(x = variable1, y = variable2, fill = correlation)) +
  geom_tile() +
  scale_fill_gradient2(low = "#E74C3C", mid = "white", high = "#2ECC71", midpoint = 0) +
  geom_text(aes(label = round(correlation, 2)), color = "black", size = 3) +
  labs(
    title = "Correlation Between Pros and Cons Variables",
    x = NULL,
    y = NULL,
    fill = "Correlation"
  ) +
  theme_minimal() +
  theme(
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank()
  )


Means

Next we explore the mean values for each of the Pros_and_Cons variables. Looking at the variables this way confirms that overall the sentiment towards these questions overall appears to be positive

Code for Nerds
# Calculate means for each Pros_And_Cons variable
means_with_sd <- data_processed %>%
  # Select only the numeric versions
  select(contains("_numeric")) %>%
  # Calculate mean and standard error for each variable
  summarise(across(everything(), 
                   list(
                     mean = ~ mean(.x, na.rm = TRUE),
                     se = ~ sd(.x, na.rm = TRUE) / sqrt(sum(!is.na(.x)))
                   ))) %>%
  # Convert to long format
  pivot_longer(
    cols = everything(),
    names_to = c("variable", ".value"),
    names_pattern = "(.*)_(mean|se)"
  ) %>%
  # Clean variable names
  mutate(
    variable = gsub("Pros_And_Cons_|_numeric", "", variable),
    variable = gsub("_", " ", variable),
    # Create factor for sorting
    variable = fct_reorder(variable, mean)
  )

# Create plot with error bars
means_with_error_plot <- ggplot(means_with_sd, 
                               aes(x = variable, y = mean)) +
  geom_bar(stat = "identity", 
           aes(fill = mean > 0),
           width = 0.7) +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), 
                width = 0.2) +
  scale_fill_manual(values = c("TRUE" = "#2ECC71", "FALSE" = "#E74C3C")) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  coord_flip() +
  labs(
    title = "Average Response by Category with Standard Error",
    subtitle = "Values range from -1 (negative) to +1 (positive)",
    x = NULL,
    y = "Mean Value"
  ) +
  theme_minimal() +
  theme(legend.position = "none") +
  scale_y_continuous(limits = c(-1, 1), 
                     breaks = seq(-1, 1, 0.25))

# Display the plot with error bars
means_with_error_plot

Thematic Means

We were asked to look at these variables together as themes. To do so we grouped the variables in the way they were set out in the emails. Again these show that there are differing levels of positivity but nevertheless, it is interesting to note promotion and progression and enforcement coming in as the least positive

Code for Nerds
# Define the themes and their corresponding variables
themes <- list(
  "Secure work and incomes" = c(
    "Pay", "Hours", "Job security"
  ),
  "Terms and conditions" = c(
    "Holiday leave", "Flexibility", "Terms and conditions"
  ),
  "Progression and opportunity" = c(
    "Progress", "Training", "Specialise"
  ),
  "Enforcement" = c(
    "Rights", "Health safety"
  ),
  "Connections and relationships" = c(
    "Treatment", "Connected", "Invested"
  )
)

# Prepare the data with theme information
themed_data <- data_processed %>%
  # Select only the numeric versions
  select(contains("_numeric")) %>%
  # Create a mapping from column names to themes
  pivot_longer(
    cols = everything(),
    names_to = "variable",
    values_to = "value"
  ) %>%
  # Clean variable names
  mutate(
    clean_var = gsub("Pros_And_Cons_|_numeric", "", variable),
    clean_var = gsub("_", " ", clean_var)
  ) %>%
  # Add theme classification
  mutate(theme = case_when(
    grepl("Pay", clean_var, ignore.case = TRUE) ~ "Secure work and incomes",
    grepl("Hours", clean_var, ignore.case = TRUE) ~ "Secure work and incomes",
    grepl("security", clean_var, ignore.case = TRUE) ~ "Secure work and incomes",
    
    grepl("Holiday", clean_var, ignore.case = TRUE) ~ "Terms and conditions",
    grepl("Flex", clean_var, ignore.case = TRUE) ~ "Terms and conditions",
    grepl("Terms", clean_var, ignore.case = TRUE) ~ "Terms and conditions",
    
    grepl("Progress|Promotion", clean_var, ignore.case = TRUE) ~ "Progression and opportunity",
    grepl("Training|Development", clean_var, ignore.case = TRUE) ~ "Progression and opportunity",
    grepl("Special", clean_var, ignore.case = TRUE) ~ "Progression and opportunity",
    
    grepl("Rights", clean_var, ignore.case = TRUE) ~ "Enforcement",
    grepl("Health|Safety", clean_var, ignore.case = TRUE) ~ "Enforcement",
    
    grepl("Treatment", clean_var, ignore.case = TRUE) ~ "Connections and relationships",
    grepl("Connect", clean_var, ignore.case = TRUE) ~ "Connections and relationships",
    grepl("Invest", clean_var, ignore.case = TRUE) ~ "Connections and relationships",
    
    TRUE ~ "Other"
  ))

# Calculate means by theme
theme_means <- themed_data %>%
  filter(!is.na(value)) %>%
  group_by(theme) %>%
  summarise(
    mean = mean(value, na.rm = TRUE),
    se = sd(value, na.rm = TRUE) / sqrt(n()),
    n = n()
  ) %>%
  filter(theme != "Other") %>%
  # Create factor for sorting
  mutate(theme = fct_reorder(theme, mean))

# Create plot with error bars
theme_means_error_plot <- ggplot(theme_means, 
                                aes(x = theme, y = mean)) +
  geom_bar(stat = "identity", 
           aes(fill = mean > 0),
           width = 0.7) +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), 
                width = 0.2) +
  scale_fill_manual(values = c("TRUE" = "#2ECC71", "FALSE" = "#E74C3C")) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  coord_flip() +
  labs(
    title = "Average Response by Theme with Standard Error",
    subtitle = "Values range from -1 (negative) to +1 (positive)",
    x = NULL,
    y = "Mean Value",
    caption = paste("Error bars represent standard error of the mean")
  ) +
  theme_minimal() +
  theme(legend.position = "none") +
  scale_y_continuous(limits = c(-1, 1), 
                     breaks = seq(-1, 1, 0.25))

# Display plot
theme_means_error_plot

Code for Nerds
# Calculate means for individual variables but organized by theme
variable_means_by_theme <- themed_data %>%
  filter(!is.na(value), theme != "Other") %>%
  group_by(theme, clean_var) %>%
  summarise(
    mean = mean(value, na.rm = TRUE),
    se = sd(value, na.rm = TRUE) / sqrt(n()),
    n = n(),
    .groups = "drop"
  ) %>%
  # Create ordered factors
  group_by(theme) %>%
  mutate(clean_var = fct_reorder(clean_var, mean)) %>%
  ungroup() %>%
  mutate(theme = factor(theme, levels = levels(theme_means$theme)))

# Create a plot showing variables within themes
themed_variables_plot <- ggplot(variable_means_by_theme, 
                               aes(x = clean_var, y = mean, fill = theme)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_errorbar(aes(ymin = mean - se, ymax = mean + se), width = 0.2) +
  facet_grid(theme ~ ., scales = "free_y", space = "free_y") +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  coord_flip() +
  labs(
    title = "Average Response by Variable within Themes",
    subtitle = "Values range from -1 (negative) to +1 (positive)",
    x = NULL,
    y = "Mean Value"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    strip.text = element_text(face = "bold"),
    panel.spacing = unit(1, "lines")
  ) +
  scale_y_continuous(limits = c(-1, 1), 
                     breaks = seq(-1, 1, 0.5))

# Display the faceted plot
themed_variables_plot

Basic Regression (2 vars)

Now, to my point about ‘over-claiming’ its useful to understand whether variations in responses to these question are usefully explained by other variables that seem to have a relationship to outsourcing. Namely, Gender and Ethnicity. We explore this through a simple regression model.

In relation to security (e.g. pay and hours etc) being a woman was associated with lower scores and this result is significant. Moreover, Being of Black/ African/Caribbean/Black British origin was associated with higher scores and this result was significant. Similar results are found across all themes with slight variations.

Interestingly, in the ‘connections’ theme all minority groups have significantly higher scores compared to the White baseline. Potentially indicating higher exposure to similar others (re: homophily).

Note

In a forest plot, if the confidence interval or the point estimate crosses 0, the result is not statistically significant.

Code for Nerds
# Prepare data for regression analysis
# First, let's create a dataset with demographic variables and theme means per respondent
regression_data <- data_processed %>%
  # Select demographic variables and numeric pros/cons variables
  select(
    # Demographic variables (adjust column names to match your actual data)
    Sex, Ethnicity_Collapsed,
    # All of the recoded variables
    contains("_numeric")
  ) %>%
  # Calculate mean score for each theme per respondent
  mutate(
    # Create theme scores for each respondent by averaging relevant variables
    theme_secure = rowMeans(select(., contains("Pay_numeric"), contains("Hours_numeric"), contains("security_numeric")), na.rm = TRUE),
    theme_terms = rowMeans(select(., contains("Holiday_numeric"), contains("Flex_numeric"), contains("Terms_numeric")), na.rm = TRUE),
    theme_progression = rowMeans(select(., contains("Progress_numeric"), contains("Training_numeric"), contains("Special_numeric")), na.rm = TRUE),
    theme_enforcement = rowMeans(select(., contains("Rights_numeric"), contains("Health_numeric"), contains("Safety_numeric")), na.rm = TRUE),
    theme_connections = rowMeans(select(., contains("Treatment_numeric"), contains("Connect_numeric"), contains("Invest_numeric")), na.rm = TRUE)
  )%>%
    mutate(across(where(is.character), as.factor))

#relevel
regression_data$Sex<-relevel(regression_data$Sex, ref = "Male")
regression_data$Ethnicity_Collapsed<- relevel(regression_data$Ethnicity_Collapsed, ref = "White")

# Run regression models for each theme
model_secure <- lm(theme_secure ~ Sex + Ethnicity_Collapsed, regression_data)
model_terms <- lm(theme_terms ~ Sex + Ethnicity_Collapsed, data = regression_data)
model_progression <- lm(theme_progression ~ Sex + Ethnicity_Collapsed, data = regression_data)
model_enforcement <- lm(theme_enforcement ~ Sex + Ethnicity_Collapsed, data = regression_data)
model_connections <- lm(theme_connections ~ Sex + Ethnicity_Collapsed, data = regression_data)

# Display summary of one model
summary(model_secure)

Call:
lm(formula = theme_secure ~ Sex + Ethnicity_Collapsed, data = regression_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.2639 -0.2303 -0.1142  0.4028  0.9401 

Coefficients:
                                                          Estimate Std. Error
(Intercept)                                                0.16770    0.01892
SexFemale                                                 -0.05346    0.02609
SexOther                                                   0.46992    0.54097
SexPrefer not to say                                       0.16563    0.53952
Ethnicity_CollapsedArab                                    0.23403    0.19125
Ethnicity_CollapsedAsian/Asian British                     0.02905    0.04492
Ethnicity_CollapsedBlack/ African/Caribbean/Black British  0.09618    0.03871
Ethnicity_CollapsedMixed/Multiple ethnic groups            0.06258    0.05950
Ethnicity_CollapsedOther ethnic group                      0.12035    0.24162
Ethnicity_CollapsedPrefer not to say                      -0.05433    0.12162
                                                          t value Pr(>|t|)    
(Intercept)                                                 8.863   <2e-16 ***
SexFemale                                                  -2.049   0.0406 *  
SexOther                                                    0.869   0.3851    
SexPrefer not to say                                        0.307   0.7589    
Ethnicity_CollapsedArab                                     1.224   0.2212    
Ethnicity_CollapsedAsian/Asian British                      0.647   0.5180    
Ethnicity_CollapsedBlack/ African/Caribbean/Black British   2.485   0.0131 *  
Ethnicity_CollapsedMixed/Multiple ethnic groups             1.052   0.2930    
Ethnicity_CollapsedOther ethnic group                       0.498   0.6185    
Ethnicity_CollapsedPrefer not to say                       -0.447   0.6551    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5392 on 1747 degrees of freedom
  (57 observations deleted due to missingness)
Multiple R-squared:  0.007901,  Adjusted R-squared:  0.00279 
F-statistic: 1.546 on 9 and 1747 DF,  p-value: 0.1265
Code for Nerds
summary(model_terms)

Call:
lm(formula = theme_terms ~ Sex + Ethnicity_Collapsed, data = regression_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.4052 -0.1491 -0.1108  0.3892  1.1365 

Coefficients:
                                                          Estimate Std. Error
(Intercept)                                                0.13387    0.01960
SexFemale                                                 -0.02303    0.02707
SexOther                                                  -0.14913    0.55816
SexPrefer not to say                                      -1.13387    0.55666
Ethnicity_CollapsedArab                                    0.19015    0.19734
Ethnicity_CollapsedAsian/Asian British                     0.01526    0.04650
Ethnicity_CollapsedBlack/ African/Caribbean/Black British  0.27131    0.04034
Ethnicity_CollapsedMixed/Multiple ethnic groups            0.13045    0.06140
Ethnicity_CollapsedOther ethnic group                     -0.24735    0.27862
Ethnicity_CollapsedPrefer not to say                       0.03920    0.13214
                                                          t value Pr(>|t|)    
(Intercept)                                                 6.831 1.16e-11 ***
SexFemale                                                  -0.851   0.3950    
SexOther                                                   -0.267   0.7894    
SexPrefer not to say                                       -2.037   0.0418 *  
Ethnicity_CollapsedArab                                     0.964   0.3354    
Ethnicity_CollapsedAsian/Asian British                      0.328   0.7429    
Ethnicity_CollapsedBlack/ African/Caribbean/Black British   6.726 2.37e-11 ***
Ethnicity_CollapsedMixed/Multiple ethnic groups             2.124   0.0338 *  
Ethnicity_CollapsedOther ethnic group                      -0.888   0.3748    
Ethnicity_CollapsedPrefer not to say                        0.297   0.7668    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5563 on 1726 degrees of freedom
  (78 observations deleted due to missingness)
Multiple R-squared:  0.03077,   Adjusted R-squared:  0.02572 
F-statistic: 6.089 on 9 and 1726 DF,  p-value: 1.839e-08
Code for Nerds
summary(model_progression)

Call:
lm(formula = theme_progression ~ Sex + Ethnicity_Collapsed, data = regression_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.37350 -0.34048 -0.09284  0.87414  1.00598 

Coefficients:
                                                           Estimate Std. Error
(Intercept)                                                0.125862   0.026418
SexFemale                                                 -0.033024   0.036419
SexOther                                                  -0.027042   0.742005
SexPrefer not to say                                       0.874138   0.739958
Ethnicity_CollapsedArab                                    0.515650   0.262332
Ethnicity_CollapsedAsian/Asian British                    -0.098820   0.062763
Ethnicity_CollapsedBlack/ African/Caribbean/Black British  0.247641   0.053730
Ethnicity_CollapsedMixed/Multiple ethnic groups            0.061229   0.082566
Ethnicity_CollapsedOther ethnic group                      0.287348   0.331399
Ethnicity_CollapsedPrefer not to say                      -0.002387   0.180849
                                                          t value Pr(>|t|)    
(Intercept)                                                 4.764 2.06e-06 ***
SexFemale                                                  -0.907   0.3646    
SexOther                                                   -0.036   0.9709    
SexPrefer not to say                                        1.181   0.2376    
Ethnicity_CollapsedArab                                     1.966   0.0495 *  
Ethnicity_CollapsedAsian/Asian British                     -1.574   0.1156    
Ethnicity_CollapsedBlack/ African/Caribbean/Black British   4.609 4.35e-06 ***
Ethnicity_CollapsedMixed/Multiple ethnic groups             0.742   0.4584    
Ethnicity_CollapsedOther ethnic group                       0.867   0.3860    
Ethnicity_CollapsedPrefer not to say                       -0.013   0.9895    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7395 on 1689 degrees of freedom
  (115 observations deleted due to missingness)
Multiple R-squared:  0.01903,   Adjusted R-squared:  0.0138 
F-statistic: 3.641 on 9 and 1689 DF,  p-value: 0.0001619
Code for Nerds
summary(model_enforcement)

Call:
lm(formula = theme_enforcement ~ Sex + Ethnicity_Collapsed, data = regression_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.3025 -0.2025 -0.1132  0.3868  0.8868 

Coefficients:
                                                           Estimate Std. Error
(Intercept)                                                0.113169   0.019936
SexFemale                                                  0.005622   0.027463
SexOther                                                  -0.196868   0.567715
SexPrefer not to say                                       0.886831   0.566200
Ethnicity_CollapsedArab                                    0.509020   0.200716
Ethnicity_CollapsedAsian/Asian British                     0.083700   0.047163
Ethnicity_CollapsedBlack/ African/Caribbean/Black British  0.183699   0.040723
Ethnicity_CollapsedMixed/Multiple ethnic groups            0.103089   0.062783
Ethnicity_CollapsedOther ethnic group                      0.284582   0.253569
Ethnicity_CollapsedPrefer not to say                       0.010426   0.127643
                                                          t value Pr(>|t|)    
(Intercept)                                                 5.676 1.61e-08 ***
SexFemale                                                   0.205   0.8378    
SexOther                                                   -0.347   0.7288    
SexPrefer not to say                                        1.566   0.1175    
Ethnicity_CollapsedArab                                     2.536   0.0113 *  
Ethnicity_CollapsedAsian/Asian British                      1.775   0.0761 .  
Ethnicity_CollapsedBlack/ African/Caribbean/Black British   4.511 6.89e-06 ***
Ethnicity_CollapsedMixed/Multiple ethnic groups             1.642   0.1008    
Ethnicity_CollapsedOther ethnic group                       1.122   0.2619    
Ethnicity_CollapsedPrefer not to say                        0.082   0.9349    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5658 on 1737 degrees of freedom
  (67 observations deleted due to missingness)
Multiple R-squared:  0.01797,   Adjusted R-squared:  0.01288 
F-statistic: 3.532 on 9 and 1737 DF,  p-value: 0.0002367
Code for Nerds
summary(model_connections)

Call:
lm(formula = theme_connections ~ Sex + Ethnicity_Collapsed, data = regression_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.25528 -0.15944 -0.04988 -0.04401  1.28823 

Coefficients:
                                                          Estimate Std. Error
(Intercept)                                                0.04988    0.02263
SexFemale                                                 -0.00587    0.03127
SexOther                                                  -0.06056    0.63650
SexPrefer not to say                                      -0.04988    0.63472
Ethnicity_CollapsedArab                                   -0.33225    0.24049
Ethnicity_CollapsedAsian/Asian British                     0.01068    0.05413
Ethnicity_CollapsedBlack/ African/Caribbean/Black British  0.11543    0.04643
Ethnicity_CollapsedMixed/Multiple ethnic groups            0.20539    0.07200
Ethnicity_CollapsedOther ethnic group                      0.20305    0.31769
Ethnicity_CollapsedPrefer not to say                      -0.04816    0.15498
                                                          t value Pr(>|t|)   
(Intercept)                                                 2.204  0.02765 * 
SexFemale                                                  -0.188  0.85110   
SexOther                                                   -0.095  0.92421   
SexPrefer not to say                                       -0.079  0.93737   
Ethnicity_CollapsedArab                                    -1.382  0.16730   
Ethnicity_CollapsedAsian/Asian British                      0.197  0.84363   
Ethnicity_CollapsedBlack/ African/Caribbean/Black British   2.486  0.01300 * 
Ethnicity_CollapsedMixed/Multiple ethnic groups             2.853  0.00439 **
Ethnicity_CollapsedOther ethnic group                       0.639  0.52282   
Ethnicity_CollapsedPrefer not to say                       -0.311  0.75604   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.6343 on 1682 degrees of freedom
  (122 observations deleted due to missingness)
Multiple R-squared:  0.009426,  Adjusted R-squared:  0.004125 
F-statistic: 1.778 on 9 and 1682 DF,  p-value: 0.06767
Code for Nerds
# Create a function to extract coefficients from all models
extract_coefficients <- function(model, theme_name) {
  coef_data <- broom::tidy(model, conf.int = TRUE) %>%
    filter(term != "(Intercept)") %>%
    mutate(theme = theme_name)
  return(coef_data)
}

# Combine coefficients from all models
all_coefficients <- bind_rows(
  extract_coefficients(model_secure, "Secure work and incomes"),
  extract_coefficients(model_terms, "Terms and conditions"),
  extract_coefficients(model_progression, "Progression and opportunity"),
  extract_coefficients(model_enforcement, "Enforcement"),
  extract_coefficients(model_connections, "Connections and relationships")
)

# Visualize regression coefficients
coefficient_plot <- ggplot(all_coefficients, 
                         aes(x = term, y = estimate, color = theme)) +
  geom_point(position = position_dodge(width = 0.5), size = 3) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                position = position_dodge(width = 0.5),
                width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  coord_flip() +
  facet_wrap(~ theme) +
  labs(
    title = "Effect of Demographics on Theme Scores",
    subtitle = "Regression coefficients with 95% confidence intervals",
    x = NULL,
    y = "Estimated Effect",
    caption = "Reference categories: Sex = Male, Ethnicity = White"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    strip.text = element_text(face = "bold"),
    panel.grid.minor = element_blank()
  )

# Display the coefficient plot
coefficient_plot

Code for Nerds
# Alternative visualization: Forest plot of all coefficients
forest_plot <- ggplot(all_coefficients, 
                     aes(x = estimate, y = interaction(theme, term), 
                        xmin = conf.low, xmax = conf.high, color = theme)) +
  geom_point(size = 2) +
  geom_errorbarh(height = 0.2) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  labs(
    title = "Demographic Effects on Work Experience Themes",
    subtitle = "Regression coefficients with 95% confidence intervals",
    x = "Estimated Effect",
    y = NULL,
    caption = "Reference categories: Sex = Male, Ethnicity = White"
  ) +
  theme_minimal() +
  theme(
    legend.position = "right",
    panel.grid.minor = element_blank()
  )

# Display the forest plot
forest_plot

Regression (More Vars)

Finally, we consider a wider array of demographic variables and how they explain scores on these questions (Sex, Ethnicity_Collapsed, Age, Region, Education_Band, Income_Group and outsourcing group).

Generally speaking, adding these variables did not change the results above. However, as found elsewhere in this project - Age remains a significant negative predictor across the most outcomes of interest e.g. as a person gets older the less positively they respond these questions. This finding seems intuitive to me, as one gets older the more ‘stability’ (as opposed to flexibility) matters.

There also seems to be a clear effect of being less well educated, such that for people who are in the low/mid education band they seem to respond more negatively compared with the high educated. This could also be an effect of pay (?), in most cases being ‘Not low paid’ has no significant relationship with the outcomes of interest.

Code for Nerds
# Prepare data for regression analysis
# First, let's create a dataset with demographic variables and theme means per respondent
regression_data <- data_processed %>%
  # Select demographic variables and numeric pros/cons variables
  select(
    # Demographic variables (adjust column names to match your actual data)
    Sex, Ethnicity_Collapsed, Age, Region, Education_Band, Income_Group, OutsourcedNonOL,
    # All of the recoded variables
    contains("_numeric")
  ) %>%
  # Calculate mean score for each theme per respondent
  mutate(
    # Create theme scores for each respondent by averaging relevant variables
    theme_secure = rowMeans(select(., contains("Pay_numeric"), contains("Hours_numeric"), contains("security_numeric")), na.rm = TRUE),
    theme_terms = rowMeans(select(., contains("Holiday_numeric"), contains("Flex_numeric"), contains("Terms_numeric")), na.rm = TRUE),
    theme_progression = rowMeans(select(., contains("Progress_numeric"), contains("Training_numeric"), contains("Special_numeric")), na.rm = TRUE),
    theme_enforcement = rowMeans(select(., contains("Rights_numeric"), contains("Health_numeric"), contains("Safety_numeric")), na.rm = TRUE),
    theme_connections = rowMeans(select(., contains("Treatment_numeric"), contains("Connect_numeric"), contains("Invest_numeric")), na.rm = TRUE)
  ) %>%
    mutate(across(where(is.character), as.factor))

#relevel
regression_data$Sex<-relevel(regression_data$Sex, ref = "Male")
regression_data$Ethnicity_Collapsed<- relevel(regression_data$Ethnicity_Collapsed, ref = "White")

# Run regression models for each theme
model_secure <- lm(theme_secure ~ Sex + Ethnicity_Collapsed+ Age+ Region+ Education_Band+ Income_Group+ OutsourcedNonOL, regression_data)

model_terms <- lm(theme_terms ~ Sex + Ethnicity_Collapsed+ Age+ Region+ Education_Band+ Income_Group+ OutsourcedNonOL, data = regression_data)
model_progression <- lm(theme_progression ~ Sex + Ethnicity_Collapsed+ Age+ Region+ Education_Band+ Income_Group+ OutsourcedNonOL, data = regression_data)
model_enforcement <- lm(theme_enforcement ~ Sex + Ethnicity_Collapsed+ Age+ Region+ Education_Band+ Income_Group+ OutsourcedNonOL, data = regression_data)
model_connections <- lm(theme_connections ~ Sex + Ethnicity_Collapsed+ Age+ Region+ Education_Band+ Income_Group+ OutsourcedNonOL, data = regression_data)

# Display summary of one model
summary(model_secure)

Call:
lm(formula = theme_secure ~ Sex + Ethnicity_Collapsed + Age + 
    Region + Education_Band + Income_Group + OutsourcedNonOL, 
    data = regression_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.35818 -0.29357 -0.03248  0.39550  1.05974 

Coefficients:
                                                           Estimate Std. Error
(Intercept)                                                0.300382   0.071203
SexFemale                                                 -0.051535   0.026881
SexOther                                                   0.358411   0.537622
SexPrefer not to say                                       0.241915   0.537857
Ethnicity_CollapsedArab                                    0.154655   0.190998
Ethnicity_CollapsedAsian/Asian British                    -0.036448   0.046703
Ethnicity_CollapsedBlack/ African/Caribbean/Black British  0.017401   0.040578
Ethnicity_CollapsedMixed/Multiple ethnic groups            0.016844   0.060755
Ethnicity_CollapsedOther ethnic group                      0.037623   0.242479
Ethnicity_CollapsedPrefer not to say                      -0.086416   0.128237
Age                                                       -0.004261   0.001066
RegionEast of England                                     -0.047428   0.066286
RegionLondon                                               0.098451   0.056344
RegionNorth East                                          -0.070035   0.078359
RegionNorth West                                           0.007207   0.059779
RegionNorthern Ireland                                    -0.095266   0.107900
RegionScotland                                            -0.059963   0.071712
RegionSouth East                                          -0.048824   0.059642
RegionSouth West                                          -0.046897   0.067750
RegionWales                                                0.075867   0.081260
RegionWest Midlands                                        0.039688   0.065513
RegionYorkshire and the Humber                            -0.003491   0.068073
Education_BandLow                                         -0.056497   0.048821
Education_BandMid                                         -0.069059   0.030539
Income_GroupNot low                                        0.082217   0.032739
OutsourcedNonOLgroup 2 agency and long term                0.030365   0.037332
OutsourcedNonOLgroup 3 5 or 6 indicators and long term     0.032354   0.046347
                                                          t value Pr(>|t|)    
(Intercept)                                                 4.219 2.59e-05 ***
SexFemale                                                  -1.917   0.0554 .  
SexOther                                                    0.667   0.5051    
SexPrefer not to say                                        0.450   0.6529    
Ethnicity_CollapsedArab                                     0.810   0.4182    
Ethnicity_CollapsedAsian/Asian British                     -0.780   0.4352    
Ethnicity_CollapsedBlack/ African/Caribbean/Black British   0.429   0.6681    
Ethnicity_CollapsedMixed/Multiple ethnic groups             0.277   0.7816    
Ethnicity_CollapsedOther ethnic group                       0.155   0.8767    
Ethnicity_CollapsedPrefer not to say                       -0.674   0.5005    
Age                                                        -3.998 6.65e-05 ***
RegionEast of England                                      -0.716   0.4744    
RegionLondon                                                1.747   0.0808 .  
RegionNorth East                                           -0.894   0.3716    
RegionNorth West                                            0.121   0.9041    
RegionNorthern Ireland                                     -0.883   0.3774    
RegionScotland                                             -0.836   0.4032    
RegionSouth East                                           -0.819   0.4131    
RegionSouth West                                           -0.692   0.4889    
RegionWales                                                 0.934   0.3506    
RegionWest Midlands                                         0.606   0.5447    
RegionYorkshire and the Humber                             -0.051   0.9591    
Education_BandLow                                          -1.157   0.2473    
Education_BandMid                                          -2.261   0.0239 *  
Income_GroupNot low                                         2.511   0.0121 *  
OutsourcedNonOLgroup 2 agency and long term                 0.813   0.4161    
OutsourcedNonOLgroup 3 5 or 6 indicators and long term      0.698   0.4852    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5352 on 1670 degrees of freedom
  (117 observations deleted due to missingness)
Multiple R-squared:  0.04082,   Adjusted R-squared:  0.02588 
F-statistic: 2.733 on 26 and 1670 DF,  p-value: 6.431e-06
Code for Nerds
summary(model_terms)

Call:
lm(formula = theme_terms ~ Sex + Ethnicity_Collapsed + Age + 
    Region + Education_Band + Income_Group + OutsourcedNonOL, 
    data = regression_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.42263 -0.25255 -0.06702  0.40178  1.14590 

Coefficients:
                                                           Estimate Std. Error
(Intercept)                                                0.275949   0.074266
SexFemale                                                 -0.021903   0.027957
SexOther                                                  -0.232825   0.556213
SexPrefer not to say                                      -1.089624   0.556461
Ethnicity_CollapsedArab                                    0.128045   0.197616
Ethnicity_CollapsedAsian/Asian British                    -0.022737   0.048494
Ethnicity_CollapsedBlack/ African/Caribbean/Black British  0.201714   0.042587
Ethnicity_CollapsedMixed/Multiple ethnic groups            0.089687   0.062878
Ethnicity_CollapsedOther ethnic group                     -0.305044   0.279779
Ethnicity_CollapsedPrefer not to say                       0.100473   0.140419
Age                                                       -0.004948   0.001111
RegionEast of England                                      0.043345   0.068974
RegionLondon                                               0.079199   0.058804
RegionNorth East                                           0.100064   0.082032
RegionNorth West                                           0.050198   0.062334
RegionNorthern Ireland                                     0.108272   0.111813
RegionScotland                                             0.016766   0.074453
RegionSouth East                                          -0.051327   0.062104
RegionSouth West                                           0.082321   0.070832
RegionWales                                                0.164147   0.084691
RegionWest Midlands                                        0.005346   0.067820
RegionYorkshire and the Humber                             0.112959   0.071164
Education_BandLow                                         -0.115471   0.051559
Education_BandMid                                         -0.085549   0.031816
Income_GroupNot low                                        0.058763   0.034140
OutsourcedNonOLgroup 2 agency and long term                0.016975   0.038967
OutsourcedNonOLgroup 3 5 or 6 indicators and long term     0.026504   0.048120
                                                          t value Pr(>|t|)    
(Intercept)                                                 3.716 0.000209 ***
SexFemale                                                  -0.783 0.433469    
SexOther                                                   -0.419 0.675570    
SexPrefer not to say                                       -1.958 0.050383 .  
Ethnicity_CollapsedArab                                     0.648 0.517109    
Ethnicity_CollapsedAsian/Asian British                     -0.469 0.639240    
Ethnicity_CollapsedBlack/ African/Caribbean/Black British   4.737 2.36e-06 ***
Ethnicity_CollapsedMixed/Multiple ethnic groups             1.426 0.153950    
Ethnicity_CollapsedOther ethnic group                      -1.090 0.275739    
Ethnicity_CollapsedPrefer not to say                        0.716 0.474387    
Age                                                        -4.452 9.07e-06 ***
RegionEast of England                                       0.628 0.529817    
RegionLondon                                                1.347 0.178223    
RegionNorth East                                            1.220 0.222705    
RegionNorth West                                            0.805 0.420762    
RegionNorthern Ireland                                      0.968 0.333021    
RegionScotland                                              0.225 0.821861    
RegionSouth East                                           -0.826 0.408663    
RegionSouth West                                            1.162 0.245319    
RegionWales                                                 1.938 0.052771 .  
RegionWest Midlands                                         0.079 0.937180    
RegionYorkshire and the Humber                              1.587 0.112637    
Education_BandLow                                          -2.240 0.025252 *  
Education_BandMid                                          -2.689 0.007242 ** 
Income_GroupNot low                                         1.721 0.085399 .  
OutsourcedNonOLgroup 2 agency and long term                 0.436 0.663175    
OutsourcedNonOLgroup 3 5 or 6 indicators and long term      0.551 0.581861    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5537 on 1649 degrees of freedom
  (138 observations deleted due to missingness)
Multiple R-squared:  0.06335,   Adjusted R-squared:  0.04858 
F-statistic:  4.29 on 26 and 1649 DF,  p-value: 4.398e-12
Code for Nerds
summary(model_progression)

Call:
lm(formula = theme_progression ~ Sex + Ethnicity_Collapsed + 
    Age + Region + Education_Band + Income_Group + OutsourcedNonOL, 
    data = regression_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.54554 -0.32560 -0.02376  0.72119  1.22133 

Coefficients:
                                                           Estimate Std. Error
(Intercept)                                                0.240103   0.099779
SexFemale                                                 -0.036057   0.037742
SexOther                                                  -0.122693   0.742286
SexPrefer not to say                                       1.003497   0.742629
Ethnicity_CollapsedArab                                    0.450494   0.263738
Ethnicity_CollapsedAsian/Asian British                    -0.171595   0.065546
Ethnicity_CollapsedBlack/ African/Caribbean/Black British  0.151600   0.056702
Ethnicity_CollapsedMixed/Multiple ethnic groups           -0.010310   0.084868
Ethnicity_CollapsedOther ethnic group                      0.163987   0.334866
Ethnicity_CollapsedPrefer not to say                       0.019790   0.193952
Age                                                       -0.005694   0.001503
RegionEast of England                                      0.052099   0.093123
RegionLondon                                               0.178879   0.078807
RegionNorth East                                           0.036339   0.110473
RegionNorth West                                           0.130603   0.083383
RegionNorthern Ireland                                    -0.129263   0.151229
RegionScotland                                            -0.006283   0.100238
RegionSouth East                                           0.096312   0.083433
RegionSouth West                                           0.108876   0.094676
RegionWales                                                0.243187   0.112494
RegionWest Midlands                                        0.209318   0.090936
RegionYorkshire and the Humber                             0.210197   0.094929
Education_BandLow                                         -0.065743   0.069533
Education_BandMid                                         -0.119755   0.042950
Income_GroupNot low                                        0.057525   0.046081
OutsourcedNonOLgroup 2 agency and long term                0.064258   0.052361
OutsourcedNonOLgroup 3 5 or 6 indicators and long term    -0.022788   0.064889
                                                          t value Pr(>|t|)    
(Intercept)                                                 2.406 0.016225 *  
SexFemale                                                  -0.955 0.339539    
SexOther                                                   -0.165 0.868736    
SexPrefer not to say                                        1.351 0.176796    
Ethnicity_CollapsedArab                                     1.708 0.087808 .  
Ethnicity_CollapsedAsian/Asian British                     -2.618 0.008929 ** 
Ethnicity_CollapsedBlack/ African/Caribbean/Black British   2.674 0.007579 ** 
Ethnicity_CollapsedMixed/Multiple ethnic groups            -0.121 0.903320    
Ethnicity_CollapsedOther ethnic group                       0.490 0.624406    
Ethnicity_CollapsedPrefer not to say                        0.102 0.918740    
Age                                                        -3.789 0.000157 ***
RegionEast of England                                       0.559 0.575925    
RegionLondon                                                2.270 0.023350 *  
RegionNorth East                                            0.329 0.742245    
RegionNorth West                                            1.566 0.117474    
RegionNorthern Ireland                                     -0.855 0.392815    
RegionScotland                                             -0.063 0.950029    
RegionSouth East                                            1.154 0.248520    
RegionSouth West                                            1.150 0.250317    
RegionWales                                                 2.162 0.030782 *  
RegionWest Midlands                                         2.302 0.021473 *  
RegionYorkshire and the Humber                              2.214 0.026950 *  
Education_BandLow                                          -0.945 0.344550    
Education_BandMid                                          -2.788 0.005362 ** 
Income_GroupNot low                                         1.248 0.212091    
OutsourcedNonOLgroup 2 agency and long term                 1.227 0.219920    
OutsourcedNonOLgroup 3 5 or 6 indicators and long term     -0.351 0.725498    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.7388 on 1614 degrees of freedom
  (173 observations deleted due to missingness)
Multiple R-squared:  0.04653,   Adjusted R-squared:  0.03117 
F-statistic: 3.029 on 26 and 1614 DF,  p-value: 5.116e-07
Code for Nerds
summary(model_enforcement)

Call:
lm(formula = theme_enforcement ~ Sex + Ethnicity_Collapsed + 
    Age + Region + Education_Band + Income_Group + OutsourcedNonOL, 
    data = regression_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.3433 -0.2260 -0.1012  0.3839  1.1381 

Coefficients:
                                                           Estimate Std. Error
(Intercept)                                                0.169508   0.075881
SexFemale                                                  0.003371   0.028509
SexOther                                                  -0.214431   0.568268
SexPrefer not to say                                       0.867818   0.568508
Ethnicity_CollapsedArab                                    0.472808   0.201880
Ethnicity_CollapsedAsian/Asian British                     0.047222   0.049379
Ethnicity_CollapsedBlack/ African/Caribbean/Black British  0.152877   0.043019
Ethnicity_CollapsedMixed/Multiple ethnic groups            0.088485   0.064608
Ethnicity_CollapsedOther ethnic group                      0.264218   0.256321
Ethnicity_CollapsedPrefer not to say                       0.046391   0.135404
Age                                                       -0.001758   0.001133
RegionEast of England                                      0.021503   0.070519
RegionLondon                                               0.006845   0.060143
RegionNorth East                                          -0.028572   0.083207
RegionNorth West                                           0.001839   0.063873
RegionNorthern Ireland                                    -0.049873   0.115888
RegionScotland                                             0.020376   0.076236
RegionSouth East                                          -0.125223   0.063597
RegionSouth West                                          -0.018806   0.072092
RegionWales                                                0.037448   0.086265
RegionWest Midlands                                        0.044717   0.069467
RegionYorkshire and the Humber                             0.083692   0.072394
Education_BandLow                                         -0.039243   0.051778
Education_BandMid                                         -0.033865   0.032432
Income_GroupNot low                                        0.047103   0.034602
OutsourcedNonOLgroup 2 agency and long term                0.040202   0.039547
OutsourcedNonOLgroup 3 5 or 6 indicators and long term    -0.076374   0.049325
                                                          t value Pr(>|t|)    
(Intercept)                                                 2.234  0.02562 *  
SexFemale                                                   0.118  0.90590    
SexOther                                                   -0.377  0.70597    
SexPrefer not to say                                        1.526  0.12708    
Ethnicity_CollapsedArab                                     2.342  0.01930 *  
Ethnicity_CollapsedAsian/Asian British                      0.956  0.33905    
Ethnicity_CollapsedBlack/ African/Caribbean/Black British   3.554  0.00039 ***
Ethnicity_CollapsedMixed/Multiple ethnic groups             1.370  0.17100    
Ethnicity_CollapsedOther ethnic group                       1.031  0.30278    
Ethnicity_CollapsedPrefer not to say                        0.343  0.73194    
Age                                                        -1.552  0.12094    
RegionEast of England                                       0.305  0.76046    
RegionLondon                                                0.114  0.90941    
RegionNorth East                                           -0.343  0.73135    
RegionNorth West                                            0.029  0.97703    
RegionNorthern Ireland                                     -0.430  0.66700    
RegionScotland                                              0.267  0.78929    
RegionSouth East                                           -1.969  0.04912 *  
RegionSouth West                                           -0.261  0.79423    
RegionWales                                                 0.434  0.66427    
RegionWest Midlands                                         0.644  0.51984    
RegionYorkshire and the Humber                              1.156  0.24782    
Education_BandLow                                          -0.758  0.44861    
Education_BandMid                                          -1.044  0.29656    
Income_GroupNot low                                         1.361  0.17361    
OutsourcedNonOLgroup 2 agency and long term                 1.017  0.30951    
OutsourcedNonOLgroup 3 5 or 6 indicators and long term     -1.548  0.12172    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5657 on 1660 degrees of freedom
  (127 observations deleted due to missingness)
Multiple R-squared:  0.03353,   Adjusted R-squared:  0.0184 
F-statistic: 2.215 on 26 and 1660 DF,  p-value: 0.0004154
Code for Nerds
summary(model_connections)

Call:
lm(formula = theme_connections ~ Sex + Ethnicity_Collapsed + 
    Age + Region + Education_Band + Income_Group + OutsourcedNonOL, 
    data = regression_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-1.37189 -0.18065 -0.03348  0.18793  1.37071 

Coefficients:
                                                           Estimate Std. Error
(Intercept)                                                0.131401   0.085582
SexFemale                                                 -0.001562   0.032414
SexOther                                                  -0.158853   0.636547
SexPrefer not to say                                       0.080487   0.636751
Ethnicity_CollapsedArab                                   -0.375341   0.241483
Ethnicity_CollapsedAsian/Asian British                    -0.030743   0.056381
Ethnicity_CollapsedBlack/ African/Caribbean/Black British  0.064017   0.049063
Ethnicity_CollapsedMixed/Multiple ethnic groups            0.159888   0.074017
Ethnicity_CollapsedOther ethnic group                      0.158933   0.320199
Ethnicity_CollapsedPrefer not to say                       0.051978   0.165719
Age                                                       -0.004481   0.001287
RegionEast of England                                     -0.029109   0.079071
RegionLondon                                               0.121226   0.067898
RegionNorth East                                           0.009858   0.094008
RegionNorth West                                           0.081234   0.071757
RegionNorthern Ireland                                     0.189611   0.129793
RegionScotland                                            -0.016985   0.086322
RegionSouth East                                           0.019902   0.071672
RegionSouth West                                           0.108455   0.081029
RegionWales                                                0.110079   0.097540
RegionWest Midlands                                        0.076778   0.078318
RegionYorkshire and the Humber                             0.060168   0.081921
Education_BandLow                                          0.009102   0.059797
Education_BandMid                                         -0.079423   0.036799
Income_GroupNot low                                        0.080348   0.039481
OutsourcedNonOLgroup 2 agency and long term               -0.030822   0.045180
OutsourcedNonOLgroup 3 5 or 6 indicators and long term     0.060366   0.055290
                                                          t value Pr(>|t|)    
(Intercept)                                                 1.535 0.124885    
SexFemale                                                  -0.048 0.961577    
SexOther                                                   -0.250 0.802964    
SexPrefer not to say                                        0.126 0.899429    
Ethnicity_CollapsedArab                                    -1.554 0.120305    
Ethnicity_CollapsedAsian/Asian British                     -0.545 0.585639    
Ethnicity_CollapsedBlack/ African/Caribbean/Black British   1.305 0.192150    
Ethnicity_CollapsedMixed/Multiple ethnic groups             2.160 0.030908 *  
Ethnicity_CollapsedOther ethnic group                       0.496 0.619712    
Ethnicity_CollapsedPrefer not to say                        0.314 0.753828    
Age                                                        -3.481 0.000513 ***
RegionEast of England                                      -0.368 0.712824    
RegionLondon                                                1.785 0.074380 .  
RegionNorth East                                            0.105 0.916501    
RegionNorth West                                            1.132 0.257773    
RegionNorthern Ireland                                      1.461 0.144246    
RegionScotland                                             -0.197 0.844040    
RegionSouth East                                            0.278 0.781292    
RegionSouth West                                            1.338 0.180931    
RegionWales                                                 1.129 0.259255    
RegionWest Midlands                                         0.980 0.327067    
RegionYorkshire and the Humber                              0.734 0.462772    
Education_BandLow                                           0.152 0.879042    
Education_BandMid                                          -2.158 0.031055 *  
Income_GroupNot low                                         2.035 0.042004 *  
OutsourcedNonOLgroup 2 agency and long term                -0.682 0.495211    
OutsourcedNonOLgroup 3 5 or 6 indicators and long term      1.092 0.275079    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.6336 on 1612 degrees of freedom
  (175 observations deleted due to missingness)
Multiple R-squared:  0.03345,   Adjusted R-squared:  0.01786 
F-statistic: 2.146 on 26 and 1612 DF,  p-value: 0.0007046
Code for Nerds
# Create a function to extract coefficients from all models
extract_coefficients <- function(model, theme_name) {
  coef_data <- broom::tidy(model, conf.int = TRUE) %>%
    filter(term != "(Intercept)") %>%
    mutate(theme = theme_name)
  return(coef_data)
}

# Combine coefficients from all models
all_coefficients <- bind_rows(
  extract_coefficients(model_secure, "Secure work and incomes"),
  extract_coefficients(model_terms, "Terms and conditions"),
  extract_coefficients(model_progression, "Progression and opportunity"),
  extract_coefficients(model_enforcement, "Enforcement"),
  extract_coefficients(model_connections, "Connections and relationships")
)

# Visualize regression coefficients
coefficient_plot <- ggplot(all_coefficients, 
                         aes(x = term, y = estimate, color = theme)) +
  geom_point(position = position_dodge(width = 0.5), size = 3) +
  geom_errorbar(aes(ymin = conf.low, ymax = conf.high),
                position = position_dodge(width = 0.5),
                width = 0.2) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  coord_flip() +
  facet_wrap(~ theme) +
  labs(
    title = "Effect of Demographics on Theme Scores",
    subtitle = "Regression coefficients with 95% confidence intervals",
    x = NULL,
    y = "Estimated Effect",
    caption = "Reference categories: Sex = Male, Ethnicity = White"
  ) +
  theme_minimal() +
  theme(
    legend.position = "none",
    strip.text = element_text(face = "bold"),
    panel.grid.minor = element_blank()
  )

# Display the coefficient plot
coefficient_plot

Code for Nerds
# Alternative visualization: Forest plot of all coefficients
forest_plot <- ggplot(all_coefficients, 
                     aes(x = estimate, y = interaction(theme, term), 
                        xmin = conf.low, xmax = conf.high, color = theme)) +
  geom_point(size = 2) +
  geom_errorbarh(height = 0.2) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "black") +
  labs(
    title = "Demographic Effects on Work Experience Themes",
    subtitle = "Regression coefficients with 95% confidence intervals",
    x = "Estimated Effect",
    y = NULL,
    caption = "Reference categories: Sex = Male, Ethnicity = White"
  ) +
  theme_minimal() +
  theme(
    legend.position = "right",
    panel.grid.minor = element_blank()
  )

# Display the forest plot
forest_plot

Person Level Approach

Here we go for an approach outlined in our email exchange, directly looking at how many people responded with negatively (-1s) across the selected variables.

The story remains largely the same. A very significant minority report 0 negative responses to these questions (40%). Only 17.4% reported 5 or more negative responses (of a possible 14)

Code for Nerds
## check that this code correctly removes NA's e.g. that the total is not affected by NA's
person_level <- data_processed %>%
  mutate(
    total = rowSums(select(., contains("_numeric")), na.rm = TRUE),
    num_neg = rowSums(select(., contains("_numeric")) == -1, na.rm = TRUE)
  )


library(ggplot2)

ggplot(person_level, aes(x = total)) +
  geom_histogram(binwidth = 5, fill = "steelblue", color = "black", alpha = 0.7) +
  labs(title = "Distribution of Total Scores", x = "Total Score", y = "Count") +
  theme_minimal()

Code for Nerds
ggplot(person_level, aes(x = num_neg)) +
  geom_histogram(binwidth = 1, fill = "red", color = "black", alpha = 0.7) +
  labs(title = "Distribution of -1 Responses per Person", 
       x = "Number of -1 Responses", 
       y = "Count of People") +
  theme_minimal()

Code for Nerds
ggplot(person_level, aes(x = num_neg)) +
  geom_density(fill = "red", alpha = 0.5) +
  labs(title = "Density Plot of -1 Responses per Person", 
       x = "Number of -1 Responses", 
       y = "Density") +
  theme_minimal()

Code for Nerds
library(dplyr)
library(ggplot2)

#Categorize people based on the number of -1 responses
person_level <- person_level %>%
  mutate(neg_category = case_when(
    num_neg == 0 ~ "0",
    num_neg == 1 ~ "1",
    num_neg == 2 ~ "2",
    num_neg == 3 ~ "3",
    num_neg == 4 ~ "4",
    num_neg >= 5 ~ "5+"
  ))

# Count and calculate percentages
neg_counts <- person_level %>%
  count(neg_category) %>%
  mutate(percent = (n / sum(n)) * 100)

# Create the bar plot
ggplot(neg_counts, aes(x = neg_category, y = percent, fill = neg_category)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  geom_text(aes(label = paste0(round(percent, 1), "%")), vjust = -0.5) +
  labs(title = "Percentage of People Reporting Negative Responses",
       x = "Number of Negative Responses (-1s)",
       y = "Percentage of People") +
  scale_fill_brewer(palette = "Reds") +
  theme_minimal()

Could you send the crosstabs for this analysis by the key demographics (pay, ethnicity, sex, migration), so we can see whether some groups report higher rates of multiple negative outcomes than others?

Code for Nerds
library(crosstable)
library(flextable)


# Create the crosstab by Income Group
Income_Crosstab <- crosstable(person_level %>% 
                               select(neg_category, num_neg, Income_Group), 
                             by = Income_Group,
                             total = "both",
                             showNA = "no", 
                             funs = c(median, mean, "std dev" = sd),
                             percent_digits = 2, 
                             percent_pattern = "{n} ({p_row})") %>%
   as_flextable()

Income_Crosstab

label

variable

Income_Group

Total

Low

Not low

neg_category

0

151 (22.98%)

506 (77.02%)

657 (37.69%)

1

56 (24.56%)

172 (75.44%)

228 (13.08%)

2

23 (15.33%)

127 (84.67%)

150 (8.61%)

3

27 (17.20%)

130 (82.80%)

157 (9.01%)

4

38 (29.46%)

91 (70.54%)

129 (7.40%)

5+

98 (23.22%)

324 (76.78%)

422 (24.21%)

Total

393 (22.55%)

1350 (77.45%)

1743 (100.00%)

num_neg

median

1.0

1.0

1.0

mean

2.6

2.6

2.6

std dev

3.0

2.9

3.0

Code for Nerds
# create a crosstab by ethnicity

Ethnicity_Crosstab <- crosstable(person_level %>% 
                               select(neg_category, num_neg, Ethnicity_Collapsed), 
                             by = Ethnicity_Collapsed,
                             total = "both",
                             showNA = "no", 
                             funs = c(median, mean, "std dev" = sd),
                             percent_digits = 2, 
                             percent_pattern = "{n} ({p_row})") %>%
   as_flextable()

Ethnicity_Crosstab

label

variable

Ethnicity_Collapsed

Total

Arab

Asian/Asian British

Black/ African/Caribbean/Black British

Mixed/Multiple ethnic groups

Other ethnic group

Prefer not to say

White

neg_category

0

4 (0.57%)

46 (6.58%)

89 (12.73%)

40 (5.72%)

2 (0.29%)

11 (1.57%)

507 (72.53%)

699 (38.53%)

1

0 (0%)

24 (9.96%)

33 (13.69%)

9 (3.73%)

1 (0.41%)

2 (0.83%)

172 (71.37%)

241 (13.29%)

2

3 (1.97%)

17 (11.18%)

26 (17.11%)

5 (3.29%)

0 (0%)

1 (0.66%)

100 (65.79%)

152 (8.38%)

3

0 (0%)

18 (11.25%)

21 (13.12%)

5 (3.12%)

0 (0%)

3 (1.88%)

113 (70.62%)

160 (8.82%)

4

1 (0.75%)

14 (10.45%)

11 (8.21%)

8 (5.97%)

1 (0.75%)

3 (2.24%)

96 (71.64%)

134 (7.39%)

5+

1 (0.23%)

50 (11.68%)

55 (12.85%)

24 (5.61%)

1 (0.23%)

5 (1.17%)

292 (68.22%)

428 (23.59%)

Total

9 (0.50%)

169 (9.32%)

235 (12.95%)

91 (5.02%)

5 (0.28%)

25 (1.38%)

1280 (70.56%)

1814 (100.00%)

num_neg

median

2.0

2.0

1.0

1.0

1.0

1.0

1.0

1.0

mean

2.0

3.0

2.5

2.6

2.2

2.4

2.5

2.5

std dev

2.6

3.0

3.0

3.2

2.7

2.9

2.9

2.9

Code for Nerds
#sex crosstab
Sex_Crosstab <- crosstable(person_level %>% 
                               select(neg_category, num_neg, Sex), 
                             by = Sex,
                             total = "both",
                             showNA = "no", 
                             funs = c(median, mean, "std dev" = sd),
                             percent_digits = 2, 
                             percent_pattern = "{n} ({p_row})") %>%
   as_flextable()

Sex_Crosstab

label

variable

Sex

Total

Female

Male

Other

Prefer not to say

neg_category

0

319 (45.64%)

378 (54.08%)

0 (0%)

2 (0.29%)

699 (38.53%)

1

105 (43.57%)

135 (56.02%)

1 (0.41%)

0 (0%)

241 (13.29%)

2

68 (44.74%)

84 (55.26%)

0 (0%)

0 (0%)

152 (8.38%)

3

67 (41.88%)

92 (57.50%)

0 (0%)

1 (0.62%)

160 (8.82%)

4

56 (41.79%)

78 (58.21%)

0 (0%)

0 (0%)

134 (7.39%)

5+

155 (36.21%)

273 (63.79%)

0 (0%)

0 (0%)

428 (23.59%)

Total

770 (42.45%)

1040 (57.33%)

1 (0.06%)

3 (0.17%)

1814 (100.00%)

num_neg

median

1.0

2.0

1.0

0

1.0

mean

2.2

2.7

1.0

1.0

2.5

std dev

2.8

3.1

NA

1.7

2.9

Code for Nerds
# Migrations

Migration_Crosstab <- crosstable(person_level %>% 
                               select(neg_category, num_neg, BORNUK), 
                             by = BORNUK,
                             total = "both",
                             showNA = "no", 
                             funs = c(median, mean, "std dev" = sd),
                             percent_digits = 2, 
                             percent_pattern = "{n} ({p_row})") %>%
   as_flextable()

Migration_Crosstab

label

variable

BORNUK

Total

I was born in the UK

More than 30 years ago

Prefer not to say

Within the last 10 years

Within the last 15 years

Within the last 20 years

Within the last 3 years

Within the last 30 years

Within the last 5 years

Within the last year

neg_category

0

524 (74.96%)

13 (1.86%)

24 (3.43%)

23 (3.29%)

20 (2.86%)

20 (2.86%)

26 (3.72%)

8 (1.14%)

22 (3.15%)

19 (2.72%)

699 (38.53%)

1

184 (76.35%)

5 (2.07%)

9 (3.73%)

6 (2.49%)

6 (2.49%)

5 (2.07%)

12 (4.98%)

1 (0.41%)

5 (2.07%)

8 (3.32%)

241 (13.29%)

2

107 (70.39%)

4 (2.63%)

2 (1.32%)

8 (5.26%)

8 (5.26%)

5 (3.29%)

8 (5.26%)

1 (0.66%)

7 (4.61%)

2 (1.32%)

152 (8.38%)

3

102 (63.75%)

7 (4.38%)

0 (0%)

9 (5.62%)

9 (5.62%)

3 (1.88%)

6 (3.75%)

4 (2.50%)

13 (8.12%)

7 (4.38%)

160 (8.82%)

4

93 (69.40%)

4 (2.99%)

2 (1.49%)

7 (5.22%)

8 (5.97%)

3 (2.24%)

5 (3.73%)

2 (1.49%)

6 (4.48%)

4 (2.99%)

134 (7.39%)

5+

254 (59.35%)

6 (1.40%)

4 (0.93%)

27 (6.31%)

22 (5.14%)

16 (3.74%)

42 (9.81%)

6 (1.40%)

27 (6.31%)

24 (5.61%)

428 (23.59%)

Total

1264 (69.68%)

39 (2.15%)

41 (2.26%)

80 (4.41%)

73 (4.02%)

52 (2.87%)

99 (5.46%)

22 (1.21%)

80 (4.41%)

64 (3.53%)

1814 (100.00%)

num_neg

median

1.0

2.0

0

3.0

3.0

2.0

3.0

3.0

3.0

3.0

1.0

mean

2.3

2.4

1.2

3.0

3.3

3.0

3.6

3.1

3.5

3.3

2.5

std dev

2.9

2.5

2.2

2.6

3.0

3.5

3.3

3.3

3.2

3.1

2.9

Could you replicate this just for the people who say they are paid less: so, what % of this group who report being paid less than if they were in-house also says yes to 1,2,3,4,5+ negative outcomes?

Wow! 307 people selected that they get paid less and it should be noted that this is a small proportion of the total sample. However, for the people who reported being paid less - every single one reported at least one other negative outcome. And 70 reported 5 or more.

Code for Nerds
library(dplyr)
library(ggplot2)

# Filter for people paid less and create negative categories
paid_less_group <- person_level %>%
  filter(Pros_And_Cons_Pay == 'I get paid less') %>%
  count(neg_category) %>%
  mutate(percent = (n / sum(n)) * 100)

# Create the bar plot for paid less group
ggplot(paid_less_group, aes(x = neg_category, y = percent, fill = neg_category)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  geom_text(aes(label = paste0(round(percent, 1), "%")), vjust = -0.5) +
  labs(title = "Percentage of People Paid Less Reporting Negative Responses",
       x = "Number of Negative Responses (-1s)",
       y = "Percentage of People Paid Less") +
  scale_fill_brewer(palette = "Reds") +
  theme_minimal()

Could you also replicate this first bit of analysis, but just focusing on the % of workers who say yes to 1, 2, 3, 4, 5+ “negatives” in the three sub-groups of “negative variables” I think are most important (below) – I think this would allow us to talk about overlaps between the most serious kinds of harm, because these represent really core elements of people’s terms and conditions:

Here are the results looking only at the three specified areas.  

Code for Nerds
## check that this code correctly removes NA's e.g. that the total is not affected by NA's
person_level_secure_terms_enforcement <- data_processed %>%
  select(Pros_And_Cons_Pay_numeric,
         Pros_And_Cons_Hours_numeric,
         Pros_And_Cons_Security_numeric,
         Pros_And_Cons_Holiday_numeric,
         Pros_And_Cons_Flexibility_numeric,
         Pros_And_Cons_Terms_numeric,
         Pros_And_Cons_HealthSafety_numeric,
         Pros_And_Cons_Rights_numeric
         ) %>% 
  filter(rowSums(is.na(.)) < ncol(.)) %>%
  mutate(
    num_neg = rowSums(. == -1, na.rm = TRUE)
  ) %>%
  mutate(neg_category = case_when(
    num_neg == 0 ~ "0",
    num_neg == 1 ~ "1",
    num_neg == 2 ~ "2",
    num_neg == 3 ~ "3",
    num_neg == 4 ~ "4",
    num_neg >= 5 ~ "5+"
  )) %>%
  count(neg_category) %>%
  mutate(percent = (n / sum(n)) * 100)
  

library(dplyr)
library(ggplot2)

# Create the bar plot
ggplot(person_level_secure_terms_enforcement, aes(x = neg_category, y = percent, fill = neg_category)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  geom_text(aes(label = paste0(round(percent, 1), "%")), vjust = -0.5) +
  labs(title = "Percentage of People Reporting Negative Responses within the Themes of Security, Terms and Enforcement",
       x = "Number of Negative Responses (-1s)",
       y = "Percentage of People") +
  scale_fill_brewer(palette = "Reds") +
  theme_minimal()

Could you also replicate this just for the 3 “secure work and incomes” variables – so what % of workers report negatives on 1, 2 or all 3 of these variables?

Bare in mind here that the maximum is 3 i.e. where people gave a negative response to all three of the constituent variables.

Code for Nerds
# Analyze negative responses for secure work and income variables
# Calculate the percentage of negative responses across Pay, Hours, and Security variables
secure_work_incomes_group <- person_level %>%
  select(Pros_And_Cons_Pay_numeric, Pros_And_Cons_Hours_numeric, Pros_And_Cons_Security_numeric) %>%
  # Remove rows where all selected columns are NA
  filter(rowSums(is.na(.)) < ncol(.)) %>%
  mutate(
    num_neg = rowSums(. == -1, na.rm = TRUE)
  ) %>%
  mutate(neg_category = case_when(
    num_neg == 0 ~ "0",
    num_neg == 1 ~ "1",
    num_neg == 2 ~ "2",
    num_neg == 3 ~ "3",
    num_neg == 4 ~ "4",
    num_neg >= 5 ~ "5+"
  )) %>%
  count(neg_category) %>%
  mutate(percent = (n / sum(n)) * 100)

# Visualize the distribution of negative responses for secure work and income variables
ggplot(secure_work_incomes_group, aes(x = neg_category, y = percent, fill = neg_category)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  geom_text(aes(label = paste0(round(percent, 1), "%")), vjust = -0.5) +
  labs(title = "Distribution of Negative Responses on Secure Work and Income Variables",
       x = "Number of Negative Responses (-1s)",
       y = "Percentage of Respondents") +
  scale_fill_brewer(palette = "Reds") +
  theme_minimal()

Could you do the same basic analysis for this “entitlements” question – so the % of people who said yes to 1, 2, 3, 4, 5+ etc. negative variables – i.e. going without a payslip / being paid late / not being paid for holiday they’re entitled to, etc.

I dont understand what you are asking for here. What is the ‘basic analysis’ you are refering to? and which specific ‘entitlements’ variables.

  • I mean could you look at the people who say yes to 1,2,3,4,5+ of the variables for the question “Some workers don’t receive everything that they are entitled to from their employer. In your current role, have you experienced any of the following – please tick all that apply.” So that we can look at how many workers report experiencing going without multiple entitlements in their current role.

    This plot ignores where people have given a response of None

Code for Nerds
# Create a custom function to recode experiences
# Regular rights violations are coded as -1 (negative)
# "None" responses are coded as +1 (positive)
# NA values coded as 0 (not experienced)
recode_rights_experiences <- function(x, is_none_column = FALSE) {
  if (is_none_column) {
    # For RightsViolations_None column
    case_when(
      is.na(x) ~ 0,          # NA means not answered
      TRUE ~ 1               # "None" response is positive (+1)
    )
  } else {
    # For all other rights columns
    case_when(
      is.na(x) ~ 0,          # NA means not experienced (0)
      TRUE ~ -1              # Any non-NA value means negative experience (-1)
    )
  }
}

# Process the data - create numeric columns for rights variables
rights_data <- data %>%
  # Recode regular rights violations
  mutate(across(starts_with("Rights") & !contains("None"), 
                ~ recode_rights_experiences(as.character(.)),
                .names = "{.col}_numeric")) %>%
  # Special handling for None column
  mutate(RightsViolations_None_numeric = 
           recode_rights_experiences(RightsViolations_None, is_none_column = TRUE)) %>%
  # Select both original and numeric columns plus demographic variables
  select(
    starts_with("Rights"),
    contains("_numeric"),
    Sex, Age, Ethnicity, Ethnicity_Collapsed, Region, Education_Band,
    Annual_Income, Income_Group, OutsourcedNonOL, BORNUK
  )

# Calculate negative experiences count and categorize
rights_summary <- rights_data %>%
  # Select only the violation columns, excluding the "None" column
  select(contains("_numeric") & !contains("None_numeric")) %>% 
  # Remove rows where all rights variables are NA
  filter(rowSums(is.na(.)) < ncol(.)) %>%
  # Count negative responses per person
  mutate(
    num_neg = rowSums(. == -1, na.rm = TRUE)
  ) %>%
  # Categorize by number of negative experiences
  mutate(neg_category = case_when(
    num_neg == 0 ~ "0",
    num_neg == 1 ~ "1",
    num_neg == 2 ~ "2",
    num_neg == 3 ~ "3",
    num_neg == 4 ~ "4",
    num_neg >= 5 ~ "5+"
  )) %>%
  # Count respondents in each category and calculate percentages
  count(neg_category) %>%
  mutate(percent = (n / sum(n)) * 100)

# Visualize the distribution of negative rights experiences
ggplot(rights_summary, aes(x = neg_category, y = percent, fill = neg_category)) +
  geom_bar(stat = "identity", alpha = 0.8) +
  geom_text(aes(label = paste0(round(percent, 1), "%")), vjust = -0.5) +
  labs(title = "Distribution of Negative Responses on Rights",
       x = "Number of Negative Responses (-1s)",
       y = "Percentage of Respondents") +
  scale_fill_brewer(palette = "Reds") +
  theme_minimal()

Additionally investigate the most common violation

Here we include ‘None’ responses

Code for Nerds
# Calculate the frequency of both positive and negative responses
rights_frequencies_with_none <- rights_data %>%
  # Select all numeric columns for rights
  select(contains("_numeric")) %>%
  # Convert to long format for easier analysis
  pivot_longer(
    cols = everything(),
    names_to = "right_type",
    values_to = "value"
  ) %>%
  # Clean up the variable names for display
  mutate(
    right_type = str_replace(right_type, "Rights_", ""),
    right_type = str_replace(right_type, "Violations_", ""),
    right_type = str_replace(right_type, "_numeric", ""),
    # Create a response type column to distinguish positive/negative
    response_type = case_when(
      value == 1 ~ "None (No violations)",
      value == -1 ~ "Violation reported",
      TRUE ~ "No response"
    )
  ) %>%
  # Filter to include only meaningful responses (exclude zeros)
  filter(value != 0) %>%
  # Count by both right type and response type
  count(right_type, response_type, value) %>%
  # Calculate percentages based on total respondents
  mutate(
    percent = (n / nrow(rights_data)) * 100
  )

# Alternative stacked bar chart
ggplot(rights_frequencies_with_none, 
       aes(x = reorder(right_type, n), 
           y = percent,
           fill = response_type)) +
  geom_bar(stat = "identity", position = "stack", alpha = 0.8) +
  geom_text(aes(label = paste0(round(percent, 1), "%")),
            position = position_stack(vjust = 0.5)) +
  labs(
    title = "Distribution of Responses for Each Rights Issue",
    x = "Type of Rights Issue",
    y = "Percentage of Respondents",
    fill = "Response Type"
  ) +
  scale_fill_manual(values = c("None (No violations)" = "forestgreen", 
                              "Violation reported" = "darkred",
                              "No response" = "gray80")) +
  coord_flip() +
  theme_minimal() +
  theme(
    axis.text.y = element_text(size = 10),
    legend.position = "bottom"
  )

Could you also send the crosstabs for this by the key demographics (pay, ethnicity, sex, migration), so again we can see whether some groups report higher rates of multiple failures to receive their entitlements than others

TBC

On the crosstabs, which it would be great to have for both of these questions, could they be flipped? So instead of showing the proportion of respondents saying yes to each variable who are in X group, instead showing the proportion of respondents in each group who say yes to each variable? I.e. instead of seeing that 23.22% of the people who said yes to 5+ negatives were in the low pay group, could we see the % of the low pay group who said yes to 5+ negatives – for each of these demographic cross tabs?

Code for Nerds
library(crosstable)
library(flextable)


# Create the crosstab by Income Group
Reverse_Income_Crosstab <- crosstable(person_level %>% 
                               select(neg_category, Income_Group), 
                             by = neg_category,
                             total = "both",
                             showNA = "no", 
                             funs = c(median, mean, "std dev" = sd),
                             percent_digits = 2, 
                             percent_pattern = "{n} ({p_row})") %>%
   as_flextable()

Reverse_Income_Crosstab

label

variable

neg_category

Total

0

1

2

3

4

5+

Income_Group

Low

151 (38.42%)

56 (14.25%)

23 (5.85%)

27 (6.87%)

38 (9.67%)

98 (24.94%)

393 (22.55%)

Not low

506 (37.48%)

172 (12.74%)

127 (9.41%)

130 (9.63%)

91 (6.74%)

324 (24.00%)

1350 (77.45%)

Total

657 (37.69%)

228 (13.08%)

150 (8.61%)

157 (9.01%)

129 (7.40%)

422 (24.21%)

1743 (100.00%)

Code for Nerds
# create a crosstab by ethnicity

Reverse_Ethnicity_Crosstab <- crosstable(person_level %>% 
                               select(neg_category, Ethnicity_Collapsed), 
                             by = neg_category,
                             total = "both",
                             showNA = "no", 
                             funs = c(median, mean, "std dev" = sd),
                             percent_digits = 2, 
                             percent_pattern = "{n} ({p_row})") %>%
   as_flextable()

Reverse_Ethnicity_Crosstab

label

variable

neg_category

Total

0

1

2

3

4

5+

Ethnicity_Collapsed

Arab

4 (44.44%)

0 (0%)

3 (33.33%)

0 (0%)

1 (11.11%)

1 (11.11%)

9 (0.50%)

Asian/Asian British

46 (27.22%)

24 (14.20%)

17 (10.06%)

18 (10.65%)

14 (8.28%)

50 (29.59%)

169 (9.32%)

Black/ African/Caribbean/Black British

89 (37.87%)

33 (14.04%)

26 (11.06%)

21 (8.94%)

11 (4.68%)

55 (23.40%)

235 (12.95%)

Mixed/Multiple ethnic groups

40 (43.96%)

9 (9.89%)

5 (5.49%)

5 (5.49%)

8 (8.79%)

24 (26.37%)

91 (5.02%)

Other ethnic group

2 (40.00%)

1 (20.00%)

0 (0%)

0 (0%)

1 (20.00%)

1 (20.00%)

5 (0.28%)

Prefer not to say

11 (44.00%)

2 (8.00%)

1 (4.00%)

3 (12.00%)

3 (12.00%)

5 (20.00%)

25 (1.38%)

White

507 (39.61%)

172 (13.44%)

100 (7.81%)

113 (8.83%)

96 (7.50%)

292 (22.81%)

1280 (70.56%)

Total

699 (38.53%)

241 (13.29%)

152 (8.38%)

160 (8.82%)

134 (7.39%)

428 (23.59%)

1814 (100.00%)

Code for Nerds
#sex crosstab
Reverse_Sex_Crosstab <- crosstable(person_level %>% 
                               select(neg_category, Sex), 
                             by = neg_category,
                             total = "both",
                             showNA = "no", 
                             funs = c(median, mean, "std dev" = sd),
                             percent_digits = 2, 
                             percent_pattern = "{n} ({p_row})") %>%
   as_flextable()

Reverse_Sex_Crosstab

label

variable

neg_category

Total

0

1

2

3

4

5+

Sex

Female

319 (41.43%)

105 (13.64%)

68 (8.83%)

67 (8.70%)

56 (7.27%)

155 (20.13%)

770 (42.45%)

Male

378 (36.35%)

135 (12.98%)

84 (8.08%)

92 (8.85%)

78 (7.50%)

273 (26.25%)

1040 (57.33%)

Other

0 (0%)

1 (100.00%)

0 (0%)

0 (0%)

0 (0%)

0 (0%)

1 (0.06%)

Prefer not to say

2 (66.67%)

0 (0%)

0 (0%)

1 (33.33%)

0 (0%)

0 (0%)

3 (0.17%)

Total

699 (38.53%)

241 (13.29%)

152 (8.38%)

160 (8.82%)

134 (7.39%)

428 (23.59%)

1814 (100.00%)